import requests #导入requests包
import  re #导入re正则表达 库
url="https://casad.cas.cn/ysxx2022/ysmd/qtys/"  #要爬取网站的url 中国
dic={
    "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Mobile Safari/537.36 Edg/126.0.0.0"
}#伪装ua
resp = requests.get(url=url, headers=dic)
if resp:
    print("访问成功")

with open("test.html",mode="w",encoding="utf-8") as f:
    f.write(resp.text.encode("utf-8").decode("utf-8"))
print("over")
resp.close()#关闭链接 防止请求次数过多
#获取的网页  resp.text  运用正则表达式读取院士名字
obj = re.compile(r'<div class="xunhuan">.*?'
                 r'<b>(?P<bumen>.*?)</b><span>(?P<num>.*?)</span>'
                 r'.*? <div class="rmbs_a">.*?<a href=".*?">(?P<r1>\w+)</a>', re.S)
obj = re.compile(r'<a href=".*?">(?P<r1>\w+)</a>',re.S)
result1 = obj.finditer(resp.text)

for it in result1:
    # print(it.group("bumen"))
    # print(it.group("num"))
    # print(it.group("name"))
    print(it.group("r1"))

